library(dplyr)
library(stringr)
library(ggplot2)
library(scales)
library(ggrepel)
library(ggpubr)
library(stringr)

if (!file.exists("output")) {
  dir.create("output")
}

# Bubble plot displaying associations between alcohol-related signatures and the interaction between tobacco and alcohol

# Signatures of interest
signatures <- c("SBS16", "DBS4", "ID11")

bubbleplot.int <- read.csv("output/Supplementary_Table_13.csv") %>% 
  filter(signature %in% paste0(signatures,"_cat"),
         grepl("tob_alc",independent_vars)
         ) %>% 
  mutate(
    signature = factor(str_remove(signature,"_cat"),levels = signatures),
    independent_vars = factor(str_remove(independent_vars,"tob_alc"), levels = c("Alcohol","Tobacco","Tobacco + alcohol")),
    log_OR = log2(OR),
    p_adj_log = -log10(p_adj),
    signif = ifelse(p_adj < 0.05, "p adj < 0.05", "p adj > 0.05")
  )

ggplot(bubbleplot.int, aes(
  y = independent_vars, x = signature,
  color = log_OR, size = p_adj_log, shape = signif
)) +
  geom_point(stroke = 0.75) +
  scale_x_discrete(limits = signatures) +
  scale_color_gradient2(high = "#ca0020", mid = "white", low = "#0571b0", midpoint = 0, limits = c(-1, 7.1), breaks = c(0, 2, 4, 6)) +
  scale_shape_manual(values = c(19, 21)) +
  labs(
    title = "Associations with tobacco and alcohol",
    subtitle = "Corrected for sex, age of diagnosis, subsite, \nand region",
    x = "", y = "", color = "log2(OR)", size = "-log10(p adj)", shape = ""
  ) +
  theme_bw() +
  theme(
    legend.position = "right",
    plot.title = element_text(face = "bold", size = 12),
    plot.subtitle = element_text(size = 9),
    axis.text = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.text = element_text(size = 12),
    panel.grid = element_blank()
  )

ggsave(last_plot(),
       filename = paste0("output/Figure_5c_", Sys.Date(), ".pdf"),
       device = "pdf",
       width = 6.5, height = 3.5, dpi = 700
)
